# ----------------------------------------------------------------------
# ' this file process medical claims data, and produce intermediate files
# ' pain therapy
# ' by BK (bl11@indiana.edu)
# ----------------------------------------------------------------------

load_library = c('bit64','data.table','fst','future.apply','stringr','logger')
invisible(lapply(load_library, function(x) library(x, character.only=TRUE, quietly= TRUE)))
options(scipen=999)

bucket = file.path('/N','project','iuni_doctorshopping')

# read arguments
args=commandArgs(TRUE)
infile = args[[1]]
outfile = args[[2]]

logger::log_info('now reading files : ', infile)

# read medical claims data
mdata = read_fst(infile, columns=c('PATID','PAT_PLANID','FST_DT','OP_VISIT_ID',
	'ADMIT_CHAN','ADMIT_TYPE','ICD_FLAG','LOC_CD','NDC','POS','PROC_CD','PROCMOD',
	'BILL_PROV','SERVICE_PROV','REFER_PROV','PROV','PROVCAT','TOS_CD','TOS_EXT','UNITS','ALT_UNITS',
	'CLMID','CLMSEQ','CONF_ID','DSTATUS'), as.data.table=TRUE)

# read pain therapy code
logger::log_info('now reading pain_cpt files')
pain_cpt = fread(file.path(bucket,'additional_data','pain_therapy','pain_therapy_CPT.csv'))
pain_cpt = na.omit(pain_cpt)
pain_cpt[,name := NULL]

pain_cpt = unique(pain_cpt, by='CPT_code')
pain_cpt = rbind(pain_cpt, data.table(procedure='Chiropractic Massage', CPT_code = c(98940:94943)), fill=TRUE)

# transform the code to character for matching
pain_cpt[,CPT_code := as.character(CPT_code)]

# join by dropping unmerged claims
pain_pat_data = merge(x=mdata, y=pain_cpt, by.x=c('PROC_CD'), by.y='CPT_code')

fwrite(pain_pat_data, outfile)

